0.1 R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

0.2 Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

1 plot 1

library(gganimate)
## Loading required package: ggplot2
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
  animated = TRUE
  log = "x"
  #Load data
  student_ratio <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   edulit_ind = col_character(),
##   indicator = col_character(),
##   country_code = col_character(),
##   country = col_character(),
##   year = col_double(),
##   student_ratio = col_double(),
##   flag_codes = col_character(),
##   flags = col_character()
## )
  #Load annual GDP data/capita sourced from World Bank (https://data.worldbank.org/indicator/ny.gdp.pcap.cd?end=2017&start=1960)
  Annual_GDP    <- readr::read_delim("https://raw.githubusercontent.com/LiamDBailey/TidyTuesday/master/inst/extdata/GDP_data.csv", delim  = "\t")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   `Country Name` = col_character(),
##   `Country Code` = col_character(),
##   `Indicator Name` = col_character(),
##   `Indicator Code` = col_character(),
##   `2018` = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
  #Reshape GDP data so we have a year column
  Reshape_GDP <- Annual_GDP %>% 
    #Remove Indicator Name and Code, these are unimportant
    select(-3:-4) %>% 
    reshape2::melt(id.vars = 1:2, na.rm = TRUE, variable.name = "year", value.name = "GDP") %>% 
    #Rename columns to make them correspond between 2 datasets
    rename(country_GDP = `Country Name`, country_code = `Country Code`) %>% 
    #Mutate year into numeric
    mutate(year = as.integer(as.character(year)))
  
  #Subset to include student ratio data that also has GDP info
  student_ratio_GDP <- student_ratio %>% 
    filter(country_code %in% Reshape_GDP$country_code) %>% 
    left_join(Reshape_GDP, by = c("country_code", "year")) %>% 
    #Exclude those rows with no GDP
    filter(!is.na(GDP)) %>% 
    mutate(year = as.integer(year)) %>% 
    #Just look at primary education
    filter(indicator == "Primary Education")
  
  if(animated){
    
    #Find the best and worst student ratio
    worst <- student_ratio_GDP %>%
      group_by(country_code) %>% 
      summarise(mean_ratio = mean(student_ratio), n = n()) %>%
      filter(n == 6) %>% 
      arrange(mean_ratio) %>% 
      filter(row_number() %in% (n() - 1):n())
    
    best <- student_ratio_GDP %>%
      group_by(country_code) %>% 
      summarise(mean_ratio = mean(student_ratio), n = n()) %>%
      filter(n == 6) %>% 
      arrange(mean_ratio) %>%
      filter(row_number() %in% 1:2)
    
    animated_plot <- ggplot() +
      geom_point(data = student_ratio_GDP, aes(x = GDP, y = student_ratio, colour = country_code, size = GDP), alpha = 0.7)+
      ggrepel::geom_label_repel(data = filter(student_ratio_GDP, country_code %in% best$country_code),
                                aes(x = GDP, y = student_ratio, label = country), nudge_y = 5, segment.size = 0.5, family = "Ubuntu", size = 6)+
      ggrepel::geom_label_repel(data = filter(student_ratio_GDP, country_code %in% worst$country_code),
                                aes(x = GDP, y = student_ratio, label = country), nudge_x = 1, segment.size = 0.5, family = "Ubuntu", size = 6)+
      scale_colour_viridis_d()+
      labs(caption = "\nVisualisation by @ldbailey255 | GDP data: data.worldbank.org | Student ratio data: UNESCO",
           y = "Primary student-teacher ratio", x = "GDP per capita")+
      scale_y_continuous(limits = c(0, NA))+
      scale_size_continuous(range = c(3, 10))+
      theme_classic()+
      theme(title = element_text(family = "Ubuntu", colour = "black", size = 16, margin = margin(t = 10)),
            axis.text = element_text(family = "Ubuntu", size = 15, colour = "black"),
            axis.title.y = element_text(family = "Ubuntu", size = 18, colour = "black", margin = margin(r = 10)),
            axis.title.x = element_text(family = "Ubuntu", size = 18, colour = "black", margin = margin(t = 10)),
            legend.position = "none")+
      #Start gganimate code
      gganimate::transition_time(time = year)+
      labs(title = "Year: {frame_time}")+
      gganimate::shadow_mark(alpha = 0.25, wrap = FALSE, size = 2, exclude_layer = 2:3)+
      gganimate::ease_aes("linear")
    
    if(stringr::str_detect(log, "x")){
      
      animated_plot <- animated_plot +
        scale_x_log10()
      
    }
    
    if(stringr::str_detect(log, "y")){
      
      animated_plot <- animated_plot +
        scale_y_log10()
      
    }
    
    options(gganimate.dev_args = list(width = 600, height = 520))
    
    # gganimate::anim_save("./plots/07_05_19.gif", animation = animated_plot)
    # 
    # return(animated_plot)
    animated_plot
  }

#   } else {
#    
#     non_animated <- ggplot() +
#       geom_point(data = student_ratio_GDP, aes(x = GDP, y = student_ratio, colour = country_code, size = GDP), alpha = 0.7)+
#       scale_colour_viridis_d()+
#       labs(caption = "\nVisualisation by @ldbailey255 \n GDP data: data.worldbank.org | Student ratio data: UNESCO",
#            y = "Primary student-teacher ratio", x = "GDP per capita")+
#       scale_y_continuous(limits = c(0, NA))+
#       scale_size_continuous(range = c(3, 10))+
#       theme_classic()+
#       theme(title = element_text(family = "Ubuntu", colour = "black", size = 14, margin = margin(t = 10)),
#             axis.text = element_text(family = "Ubuntu", size = 14, colour = "black"),
#             axis.title.y = element_text(family = "Ubuntu", size = 17, colour = "black", margin = margin(r = 10)),
#             axis.title.x = element_text(family = "Ubuntu", size = 17, colour = "black", margin = margin(t = 10)),
#             legend.position = "none")
#     
#     if(stringr::str_detect(log, "x")){
#       
#       non_animated <- non_animated +
#         scale_x_log10()
#       
#     }
#     
#     if(stringr::str_detect(log, "y")){
#       
#       non_animated <- non_animated +
#         scale_y_log10()
#       
#     }
#     
#     ggsave(plot = non_animated, filename = "./plots/07_05_2019.png", width = 6, height = 5.2, dpi = 300)
#     
#     return(non_animated)
#     
#   }
#   
# 
# plot_07_05_19()
  # ipf_lifts <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-08/ipf_lifts.csv")
  # 
  # #We're going to look at the the top weightlifters, using a few different metrics
  # lifts_cleaned <- ipf_lifts %>% 
  #   #Create a unique name for each competition so we can work out the standardised placing in every comp
  #   dplyr::mutate(comp = paste(meet_name, lubridate::year(date), sep = "_")) %>% 
  #   #Remove SB events as they are essentially non-existent (2 lifts?)
  #   #Remove guest lifters as these individuals weren't competing
  #   dplyr::filter(event != "SB" & place != "G") %>% 
  #   #Make disqualifications and guest lifters into a large number (999) so they are given last place in any event
  #   #Determine the max lift in all three categories from an event
  #   dplyr::mutate(place_numeric = purrr::map_int(place, ~{
  #     
  #     if(..1 %in% c("DQ", "DD")){
  #       
  #       return(999L)
  #       
  #     } else {
  #       
  #       return(as.integer(..1))
  #       
  #     }
  #     
  #   }),
  #   max_lift = pmax(best3squat_kg, best3bench_kg, best3deadlift_kg, na.rm = TRUE))
  # 
  # #Go through every unique event at every competition (comp, event, division, weight class) and standardise the placing
  # #between -1 and 1. Gold medal will always be 1, last place will always be -1.
  # lifts_cleaned_std <- lifts_cleaned %>% 
  #   dplyr::group_by(comp, event, sex, division, weight_class_kg) %>% 
  #   dplyr::arrange(desc(place_numeric), .by_group = TRUE) %>% 
  #   dplyr::mutate(place_std = seq(-1, 1, length.out = n()), sum_place = sum(place_numeric, na.rm = TRUE), sum_place_expected = sum(1:n()),
  #                 DQs = any(place_numeric == 999L)) %>% 
  #   #Remove any cases where the number of places is not the same as expected (and can't be explained by DQs)
  #   #Having a quick look, these appear to be cases due to typos, missclassification etc.
  #   #We would need to fix these manually, for now just remove them
  #   #We lose a little under 1000 records from this
  #   dplyr::filter(sum_place == sum_place_expected | DQs == TRUE)
  # 
  # #For all individuals in males and females find best individuals in 4 categories:
  # #1. Most gold medals
  # #3. Max weight lifted over all competition types
  # #4. Longest career
  # GOAT <- lifts_cleaned_std %>% 
  #   dplyr::filter(!is.na(name)) %>% 
  #   dplyr::group_by(name) %>% 
  #   #N.B. We define career length as number of years in which they competed.
  #   #There are some individuals who had long careers but very few events
  #   dplyr::summarise(sex = first(sex), career_length = length(unique(lubridate::year(date))),
  #                    last_yr = lubridate::year(max(date, na.rm = TRUE)),
  #                    mean_place = mean(place_std, na.rm = TRUE), total_events = sum(!is.na(place_std)),
  #                    total_podiums = sum(place <= 3, na.rm = TRUE), perc_podiums = total_podiums/total_events,
  #                    total_golds = sum(place == 1, na.rm = TRUE), perc_golds = total_golds/total_events,
  #                    max_lift = max(max_lift, na.rm = TRUE))
  # 
  # #We only consider individuals who have participated in at least 10 events over their career
  # longest_career <- GOAT %>% 
  #   dplyr::filter(total_events > 10) %>% 
  #   dplyr::group_by(sex) %>% 
  #   dplyr::arrange(-career_length, .by_group = TRUE) %>% 
  #   slice(1)
  # most_medals    <- GOAT %>% 
  #   dplyr::filter(total_events > 10) %>% 
  #   dplyr::group_by(sex) %>% 
  #   dplyr::arrange(-perc_golds, -total_events, .by_group = TRUE) %>%
  #   slice(1)
  # highest_weight <- GOAT %>% 
  #   dplyr::filter(total_events > 10) %>% 
  #   dplyr::group_by(sex) %>% 
  #   dplyr::arrange(-max_lift, .by_group = TRUE) %>% 
  #   slice(1)
  # 
  # plot_data <- lifts_cleaned_std %>% 
  #   dplyr::filter(name %in% c(longest_career$name, most_medals$name, highest_weight$name)) %>% 
  #   #Create a event number column to compare individuals with different career time periods
  #   dplyr::arrange(name, date) %>% 
  #   dplyr::group_by(name) %>% 
  #   dplyr::mutate(event_nr = seq(1:n()),
  #                 cumsum_gold = cumsum(place == 1),
  #                 cumsum_podium = cumsum(place <= 3),
  #                 cumsum_place_std = cumsum(place_std)) %>% 
  #   dplyr::ungroup()
  # 
  # #Colour data
  # colour_palette <- tibble::tibble(colour = c("#8EA4D2", "#35678C", "#4C9F70", "#CA3C25", "#ED7D3A", "#E59F71")) %>% 
  #   #Join in related data for geom_segment
  #   dplyr::mutate(segment_data = list(filter(plot_data, name == highest_weight$name[1] & max_lift == highest_weight$max_lift[1]),
  #                                     filter(plot_data, name == highest_weight$name[2] & max_lift == highest_weight$max_lift[2]),
  #                                     filter(plot_data, name == most_medals$name[1]) %>% slice(n()),
  #                                     filter(plot_data, name == most_medals$name[2]) %>% slice(n()),
  #                                     filter(plot_data, name == longest_career$name[1]) %>% slice(n()),
  #                                     filter(plot_data, name == longest_career$name[2]) %>% slice(n() - 4)),
  #                 label_text = c(paste0("**<span style = 'color:", colour[1], "'>Bonica Brown</span> has the heaviest <br> lift of any women <br> (318kg)**"),
  #                                paste0("**In 2014 <span style = 'color:", colour[2], "'>David Lup", "\U00E1", "\U010D", "</span> <br> lifted 450kg. <br> He was only 22!**"),
  #                                paste0("**<span style = 'color:", colour[3], "'>Natalia Salnikova</span> <br> has won every event <br> she has entered but one <br> (she came second) <br>**"),
  #                                paste0("**In mens, <span style = 'color:", colour[4], "'>Sergey Fedosienko</span> <br> has never lost an event...**"),
  #                                paste0("**<span style = 'color:", colour[5], "'>Vuokko Viitasaari</span> has completed <br> 34 events, <br> more than any other woman**"),
  #                                paste0("**<span style = 'color:", colour[6], "'>Hiroyuki Isagawa</span> participated in 52 events. <br> He was 62 for his last event!**"))) %>% 
  #   tidyr::unnest(segment_data) %>% 
  #   dplyr::arrange(name)
  # 
  # title <- "Do you even lift?!"
  # subtitle1 <- "How do you pick the best powerlifter? \n"
  # # subtitle2 <- paste0("The heaviest lift? ", emo::ji("weight_lifting_man"), " The longest career?", emo::ji("spiral_calendar"),
  # # " The most gold medals?", emo::ji("1st_place_medal"))
  # subtitle3 <- "\n We look at the top male and female powerlifters using different metrics."
  # 
  # #For these top lifters, plot their weight lifts and standardised place over their career
  # weight_lifted <- ggplot() +
  #   geom_path(data = plot_data, aes(x = event_nr, y = max_lift, group = name, colour = name), size = 1, lineend = "round") +
  #   scale_colour_manual(values = colour_palette$colour) +
  #   scale_y_continuous(limits = c(0, 600), name = "Maximum weight lifted (kg)") +
  #   scale_x_continuous(limits = c(1, 60), breaks = seq(0, 60, 10), name = "Career event") +
  #   coord_equal(ratio = 0.8 * (25/600), clip = "off") +
  #   ggtext::geom_richtext(data = colour_palette %>% slice(1),
  #                 aes(x = event_nr + 10,
  #                     y = max_lift + 50, label = colour_palette$label_text[1]),
  #                 fill = NA, label.color = NA, family = "Ubuntu", colour = "white") +
  #   ggtext::geom_richtext(data = colour_palette %>% slice(2),
  #                 aes(x = event_nr,
  #                     y = max_lift + 80, label = colour_palette$label_text[2]),
  #                 fill = NA, label.color = NA, family = "Ubuntu", colour = "white") +
  #   ggtext::geom_richtext(data = colour_palette %>% slice(3),
  #                 aes(x = event_nr + 1,
  #                     y = max_lift + 90, label = colour_palette$label_text[3]),
  #                 fill = NA, label.color = NA, family = "Ubuntu", colour = "white") +
  #   ggtext::geom_richtext(data = colour_palette %>% slice(6),
  #                 aes(x = event_nr + 12,
  #                     y = max_lift - 60, label = colour_palette$label_text[6]),
  #                 fill = NA, label.color = NA, family = "Ubuntu", colour = "white") +
  #   theme_classic() +
  #   theme(legend.position = "none",
  #         plot.background = element_rect(fill = "#2E2E2E", colour = NA),
  #         panel.background = element_rect(fill = "#2E2E2E", colour = NA),
  #         axis.text = element_text(family = "Ubuntu", colour = "white"),
  #         axis.title = element_text(family = "Ubuntu", colour = "white"),
  #         axis.line = element_line(colour = "white", size = 1),
  #         axis.ticks = element_line(colour = "white"))
  # 
  # weight_lifted_gg <- ggplotGrob(weight_lifted)
  # 
  # placed <- ggplot() +
  #   geom_path(data = plot_data, aes(x = event_nr, y = cumsum_gold, group = name, colour = name), size = 1, lineend = "round") +
  #   scale_colour_manual(values = colour_palette$colour) +
  #   scale_x_continuous(limits = c(1, 60), breaks = seq(0, 60, 10), name = "Career event") +
  #   scale_y_continuous(limits = c(0, 25), name = "Total gold medals") +
  #   ggtext::geom_richtext(data = colour_palette %>% slice(4),
  #                 aes(x = event_nr - 6,
  #                     y = cumsum_gold + 5, label = colour_palette$label_text[4]),
  #                 fill = NA, label.color = NA, family = "Ubuntu", colour = "white") +
  #   ggtext::geom_richtext(data = colour_palette %>% slice(5),
  #                 aes(x = event_nr + 11,
  #                     y = cumsum_gold - 2, label = colour_palette$label_text[5]),
  #                 fill = NA, label.color = NA, family = "Ubuntu", colour = "white") +
  #   coord_equal(ratio = 0.8, clip = "off") +
  #   theme_classic() +
  #   theme(legend.position = "none",
  #         plot.background = element_rect(fill = "#2E2E2E", colour = NA),
  #         panel.background = element_rect(fill = "#2E2E2E", colour = NA),
  #         axis.text = element_text(family = "Ubuntu", colour = "white"),
  #         axis.title = element_text(family = "Ubuntu", colour = "white"),
  #         axis.line = element_line(colour = "white", size = 1),
  #         axis.ticks = element_line(colour = "white"))
  # 
  # placed_gg <- ggplotGrob(placed)
  # 
  # #Create plot with other plots placed around
  # final_plot <- ggplot() +
  #   scale_x_continuous(limits = c(0, 1)) +
  #   scale_y_continuous(limits = c(0, 1)) +
  #   annotate("text", label = title, family = "Alfa Slab One", x = 0.5, y = 0.95, size = 10, colour = "white") +
  #   annotate("text", label = paste(subtitle1, 
  #                                  # subtitle2,
  #                                  subtitle3),
  #            family = "Open Sans Semibold", x = 0.5, y = 0.85, size = 6, colour = "white") +
  #   annotation_custom(weight_lifted_gg, xmin = 0.05, xmax = 0.95, ymin = 0.4, ymax = 0.75) +
  #   annotation_custom(placed_gg, xmin = 0.05, xmax = 0.95, ymin = 0, ymax = 0.35) +
  #   labs(caption = "\nVisualisation by @ldbailey255 | Data: openpowerlifting.org") +
  #   theme(legend.position = "none",
  #         plot.background = element_rect(fill = "#2E2E2E", colour = NA),
  #         panel.background = element_rect(fill = "#2E2E2E", colour = NA),
  #         axis.text = element_blank(),
  #         axis.title = element_blank(),
  #         axis.line = element_blank(),
  #         axis.ticks = element_blank(),
  #         panel.grid = element_blank(),
  #         plot.caption = element_text(family = "Ubuntu", colour = "white"))
  # 
  # 
  # # ggsave(final_plot, filename = "./plots/08_10_19.png", height = 10, width = 10, dpi = 600)
  # final_plot
  # 
## packages
library(tidyverse)
library(stringr)
library(rcartocolor)
library(ggtext)
library(ragg)
library(colorspace)
library(glue)
library(pdftools)

theme_set(theme_void(base_family = "Overpass", base_size = 10))

theme_update(
  axis.text.x = element_text(color = "grey60", margin = margin(t = 2)),
  panel.spacing.y = unit(0, "lines"),
  panel.spacing.x = unit(3, "lines"),
  axis.line.x = element_line(color = "grey60"),
  axis.ticks.x = element_line(color = "grey60"),
  axis.ticks.length.x = unit(.4, "lines"),
  plot.title = element_text(family = "Alegreya Sans SC Light", size = 84, 
                            color = "grey10", hjust = 0, 
                            margin = margin(15, 0, 30, 0)),
  plot.subtitle = element_markdown(family = "Alegreya Sans", size = 25,
                                   color = "grey30", lineheight = 1.2,
                                   hjust = 0, margin = margin(0, 0, 30, 0)),
  plot.caption = element_markdown(family = "Alegreya Sans SC", color = "grey30", 
                                  size = 19, face = "italic", 
                                  hjust = .5, margin = margin(60, 0, 0, 0)),
  plot.title.position = "plot",
  plot.caption.position = "plot",
  plot.margin = margin(60, 120, 45, 90),
  plot.background = element_rect(color = "white", fill = "white"),
  strip.text = element_blank()
)
df_ncaa <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-10-06/tournament.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   year = col_double(),
##   school = col_character(),
##   seed = col_double(),
##   conference = col_character(),
##   conf_w = col_double(),
##   conf_l = col_double(),
##   conf_percent = col_double(),
##   conf_place = col_character(),
##   reg_w = col_double(),
##   reg_l = col_double(),
##   reg_percent = col_double(),
##   how_qual = col_character(),
##   x1st_game_at_home = col_character(),
##   tourney_w = col_double(),
##   tourney_l = col_double(),
##   tourney_finish = col_character(),
##   full_w = col_double(),
##   full_l = col_double(),
##   full_percent = col_double()
## )
## To get the points for each team/tournament season you can multiply the teams 
## initial seed (1 - 16) by the assigned points as defined in the table of 100 - 0.
conv <- 
  tribble(
    ~seed, ~points,
    1,  100,
    2,  72.7,
    3,  54.5,
    4,  48.5,
    5,  33.3,
    6,  33.3,
    7,  27.3,
    8,  21.2,
    9,  18.2,
    10, 18.2,
    11, 18.2,
    12, 15.2,
    13, 9.09,
    14, 6.06,
    15, 3.03,
    16, 0
  )

df_ncaa_cum <-
  df_ncaa %>% 
  left_join(conv, by = "seed") %>% 
  dplyr::select(year, school, seed, points) %>% 
  group_by(school) %>% 
  complete(year = full_seq((min(df_ncaa$year, na.rm = TRUE) - 1):(max(df_ncaa$year, na.rm = TRUE) + 1), 1)) %>% 
  arrange(year) %>%
  mutate(
    points_filled = if_else(is.na(points), 0, points),
    points_na = if_else(is.na(points), 0, NA_real_),
    cumsum_filled = cumsum(points_filled),
    cumsum = if_else(is.na(points), NA_real_, cumsum_filled),
    cumsum_na = if_else(is.na(points), cumsum_filled, NA_real_),
    cumsum_na = if_else(year == min(year) | year == max(year), NA_real_, cumsum_na)
  ) %>% 
  group_by(year) %>% 
  mutate(
    exp = (year - 1981) * 50,
    diff_exp = cumsum_filled - exp,
    school = if_else(school == "LSU", "Louisiana State", school),
    school = str_replace(school, "St\\.", "State")
  ) %>% 
  ungroup()

df_ncaa_top <-
  df_ncaa_cum %>% 
  group_by(school) %>% 
  filter(sum(!is.na(points)) > 9) %>% 
  filter(year == 2018) %>% 
  mutate(latest = cumsum_filled) %>% 
  ungroup() %>% 
  arrange(-latest) %>%
  mutate(id = row_number()) %>% 
  filter(id <= 30) %>% 
  dplyr::select(school, latest, id)

df_ncaa_sub <-
  df_ncaa_cum %>% 
  filter(school %in% df_ncaa_top$school) %>% 
  left_join(df_ncaa_top) %>% 
  mutate(school = fct_reorder(school, id))
## Joining, by = "school"
df_ncaa_sub %>% 
  ggplot(aes(year, cumsum_filled)) +
    ## grey shading expected score
    geom_rect(
      data = df_ncaa_sub %>% filter(year < max(df_ncaa_sub$year)),
      aes(xmin = year, xmax = year + 1,
          ymin = 0, ymax = exp),
      fill = "grey92"
    ) +
    ## color gradient difference cumulative sum <> expected score
    geom_rect(
      data = df_ncaa_sub %>% filter(year < max(df_ncaa_sub$year)),
      aes(xmin = year, xmax = year + 1,
          ymin = exp, ymax = cumsum_filled,
          fill = diff_exp), alpha = .8
    ) +
    ## line expected score
    geom_step(
      data = df_ncaa_sub %>% 
        mutate(exp = if_else(year == 2019, 1850, exp)),
      aes(y = exp), 
      color = "grey62",
      size = .4
    ) + 
    ## line cumulative sum
    geom_step(
      color = "black",
      size = .6
    ) +
    ## point cumulative sum conference participation
    geom_point(
      aes(y = cumsum), 
      color = "black", 
      size = 1.1
    ) +
    ## point cumulative sum no participation
    geom_point(
      aes(y = cumsum_na), 
      shape = 21, 
      color = "black", 
      fill = "white", 
      size = 1.3,
      stroke = .3
    ) +
    ## indicator difference 2018
    geom_linerange(
      data = df_ncaa_sub %>% 
        filter(year == max(df_ncaa_sub$year) - 1) %>% 
        mutate(p = if_else(cumsum_filled > 1850, cumsum_filled - 75, cumsum_filled + 75)),
      aes(x = 2019.6, ymin = exp, ymax = p, 
          color = diff_exp, 
          color = after_scale(darken(color, .25, space = "HLS")))
    ) +
    ## label cumulative sum 2018
    geom_text(
      data = df_ncaa_sub %>% 
        filter(year == max(df_ncaa_sub$year)),
      aes(label = round(cumsum_filled, 0), 
          color = diff_exp, 
          color = after_scale(darken(color, .25, space = "HLS"))),
      family = "Overpass Mono", 
      fontface = "bold", 
      size = 4.3, 
      hjust = 0, 
      nudge_x = .2
    ) +
    ## label difference 2018
    geom_text(
      data = df_ncaa_sub %>% 
        filter(year == max(df_ncaa_sub$year)) %>% 
        mutate(lab = if_else(diff_exp < 0, 
                             glue("{round(diff_exp, 0)}"), 
                             glue("+{round(diff_exp, 0)}"))),
      aes(y = exp + diff_exp / 2, 
          label = lab, 
          color = diff_exp, 
          color = after_scale(darken(color, .25, space = "HLS"))),
      family = "Overpass Mono", 
      size = 3.3, 
      hjust = 0, 
      nudge_x = .9
    ) +
    ## label school, partcipation + mean score
    geom_richtext(
      data = df_ncaa_sub %>% 
        group_by(school) %>% 
        mutate(lab = glue("<b style='font-size:27pt;'>{school}</b><br><br><span style='font-size:14pt;font-family:overpass;'>Conferences: {sum(!is.na(points))}<br>Mean score: {round(latest / n() - 2, 1)}</span>")) %>% 
        filter(cumsum_filled > 1400 | exp > 1400) %>% 
        slice(1),
      aes(x = year - 9, 
          y = 1800, 
          label = lab, 
          color = latest - 1850, 
          color = after_scale(darken(color, .3, space = "HLS"))),
      family = "Alegreya Sans SC ExtraBold", 
      size = 1, 
      lineheight = .9,
      label.color = NA,
      fill = NA
    ) +
    facet_wrap(~school, scales = "free_x", ncol = 6) +
    coord_cartesian(clip = "off") +
    scale_x_continuous(breaks = seq(1980, 2020, by = 5), expand = c(0, 0), limits = c(1980, 2020)) +
    scale_y_continuous(expand = c(.025, .025)) +
    scale_color_carto_c(
      palette = "Geyser", 
      direction = -1, 
      limits = c(-1255, 1255), 
      guide = "none"
    ) +
    scale_fill_carto_c(
      palette = "Geyser", 
      direction = -1, 
      limits = c(-1255, 1255), 
      guide = "none"
    ) +
    labs(
      title = "—  The Rise & Fall of Women’s College Basketball Dynasties  —", 
      subtitle = "A number of teams that were the titans of the early NCAA women’s basketball tournament have struggled in recent decades. And in their place, a new ruling class of schools has emerged to become the defining programs of<br>the modern age. FiveThirtyEight estimated the team strength over time based on NCAA Tournament seeds as a proxy in the absence of game-level data. To measure this, FiveThirtyEight awarded “seed points” in proportion to<br>a given seed number’s expected wins in the tournament, calibrated to a 100-point scale where the No. 1 seed gets 100 points, No. 2 gets 70 points, and so forth.<br><br>The visualization shows the cumulative sum of awarded seed points on a 100-point scale from the very first women’s NCAA basketball tournaments in 1982 until 2018 in comparison to a hypothetical team that participated in<br>all of the 37 conferences and gained half of the points each time (grey line). The curves highlight the fall of yesterday’s women’s basketball powerhouses such as **Louisiana Tech**, **Long Beach State**, **Southern California**, and **Old**<br>**Dominion** that have been very good throughout the history of the women’s tournament but have experienced big drop-offs in seed points over the last years. At the same time, schools such as **UConn**, **Stanford**, **Notre Dame**,<br>**Baylor**, and **Duke** started slow but picked up steam into the present day. Some teams, such as **Tennessee**, have been relatively consistent throughout the NCAA era gathering always more seed points then an average team.<br>*Shown are the top 30 college teams that participated in at least ten conferences between 1982 and 2018, sorted by the cumulative sum of seed points.*",
      caption = "Visualization by Cédric Scherer • Data by FiveThirtyEight"
    ) #+
## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 456 rows containing missing values (geom_point).
## Warning: Removed 774 rows containing missing values (geom_point).

    # ggsave(here::here("dev", glue::glue("2020_41___{format(Sys.time(), '%Y%m%d_%H%M%S')}.pdf")),
    #        width = 33, height = 30.5, device = cairo_pdf)
# https://www.data-imaginist.com/2020/insetting-a-new-patchwork-version/
# https://www.data-imaginist.com/2019/patch-it-up-and-send-it-out/
## packages
library(tidyverse)
library(colorspace)
library(ragg)
library(cowplot)
library(ggtext)
library(pdftools)

theme_set(theme_minimal(base_size = 15, base_family = "Neutraface Slab Display TT Bold"))

theme_update(
  panel.grid.major = element_line(color = "grey92", size = .4),
  panel.grid.minor = element_blank(),
  axis.title.x = element_text(color = "grey30", margin = margin(t = 7)),
  axis.title.y = element_text(color = "grey30", margin = margin(r = 7)),
  axis.text = element_text(color = "grey50"),
  axis.ticks =  element_line(color = "grey92", size = .4),
  axis.ticks.length = unit(.6, "lines"),
  legend.position = "top",
  plot.title = element_text(hjust = 0, color = "black", 
                            family = "Neutraface 2 Display Titling",
                            size = 21, margin = margin(t = 10, b = 35)),
  plot.subtitle = element_text(hjust = 0, face = "bold", color = "grey30",
                               family = "Neutraface Text Book Italic", 
                               size = 14, margin = margin(0, 0, 25, 0)),
  plot.title.position = "plot",
  plot.caption = element_text(color = "grey50", size = 10, hjust = 1,
                              family = "Neutraface Display Medium", 
                              lineheight = 1.05, margin = margin(30, 0, 0, 0)),
  plot.caption.position = "plot", 
  plot.margin = margin(rep(20, 4))
)

pal <- c("#FF8C00", "#A034F0", "#159090")
df_penguins <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv') %>% 
  mutate(species = if_else(species == "Adelie", "Adélie", species))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   species = col_character(),
##   island = col_character(),
##   bill_length_mm = col_double(),
##   bill_depth_mm = col_double(),
##   flipper_length_mm = col_double(),
##   body_mass_g = col_double(),
##   sex = col_character(),
##   year = col_double()
## )
df_peng_summary <-
  tribble(
    ~species, ~x, ~y,
    "Adélie", 34.7, 20.7,
    "Chinstrap", 55.7, 19,
    "Gentoo", 50.7, 13.6
  ) %>% 
  full_join(
    df_penguins %>% 
      group_by(species) %>% 
      summarize(across(
        contains("_"), 
        list(median = ~median(.x, na.rm = T), 
             sd = ~sd(.x, na.rm = T))
    ))
  ) %>% 
  mutate(label = glue::glue("Median length: {format(round(bill_length_mm_median, 1), nsmall = 1)} mm\nMedian depth: {format(round(bill_depth_mm_median, 1), nsmall = 1)} mm\nMedian body mass: {format(body_mass_g_median / 1000, nsmall = 1)} kg"))
## Joining, by = "species"
url <- "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/man/figures/culmen_depth.png"
img <- png::readPNG(RCurl::getURLContent(url))
i1 <- grid::rasterGrob(img, interpolate = T)
scat <- 
  ggplot(df_penguins, aes(bill_length_mm, bill_depth_mm)) +
  geom_errorbar(
    data = df_peng_summary,
    aes(
      x = bill_length_mm_median,
      ymin = bill_depth_mm_median - bill_depth_mm_sd,
      ymax = bill_depth_mm_median + bill_depth_mm_sd,
      color = species, 
      color = after_scale(darken(color, .2, space = "combined"))
    ),
    inherit.aes = F,
    width = .8,
    size = .8
  ) +
  geom_errorbar(
    data = df_peng_summary,
    aes(
      y = bill_depth_mm_median,
      xmin = bill_length_mm_median - bill_length_mm_sd,
      xmax = bill_length_mm_median + bill_length_mm_sd,
      color = species, 
      color = after_scale(darken(color, .2, space = "combined"))
    ),
    inherit.aes = F,
    width = .5,
    size = .8
  ) +
  geom_point(
    aes(
      fill = species, 
      size = body_mass_g
    ), 
    shape = 21,
    color = "transparent",
    alpha = .3
  ) +
  geom_point(
    aes(
      size = body_mass_g
    ), 
    shape = 21,
    color = "white",
    fill = "transparent"
  ) +
  geom_text(
    data = df_peng_summary,
    aes(
      x = x, y = y, 
      label = species, 
      color = species
    ),
    family = "Neutraface Slab Display TT Titl",
    size = 5.6
  ) +
  geom_text(
    data = df_peng_summary,
    aes(
      x = x, y = y - .6, 
      label = label, 
      color = species,
      color = after_scale(lighten(color, .3))
    ),
    family = "Neutraface Slab Display TT Bold",
    size = 3.5,
    lineheight = .8
  ) +
  annotate(
    "text",
    x = 37.5, y = 14.85,
    label = "Bubble size represents\nindividual body mass",
    family = "Neutraface Text Book Italic",
    color = "grey50",
    size = 3,
    lineheight = .9
  ) +
  annotate(
    "text",
    x = 40.1, y = 21.95,
    label = "Pygoscelis adéliae (Adélie penguin)  •  P. antarctica (Chinstrap penguin)  •  P. papua (Gentoo penguin)\n\n\n\n\n\n\n\n",
    family = "Neutraface Text Book Italic",
    color = "black",
    size = 3.9
  ) +
  annotation_custom(i1, ymin = 19.95, ymax = 26.95, xmin = 52.4, xmax = 60.2) +
  coord_cartesian(clip = "off") +
  scale_x_continuous(
    limits = c(30, 60),
    breaks = seq(30, 60, by = 5),
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    limits = c(12, 22),
    breaks = seq(12, 22, by = 2),
    expand = c(0, 0)
  ) +
  scale_color_manual(
    values = pal,
    guide = F
  ) +
  scale_fill_manual(
    values = pal,
    guide = F
  ) +
  scale_size(
    name = "",
    breaks = 3:6 * 1000,
    labels = c("3 kg", "4 kg", "5 kg", "6 kg")
  ) +
  guides(size = guide_legend(label.position = "bottom", 
                             override.aes = list(color = pal[2], stroke = .8, fill = NA))) +
  theme(legend.position = c(.24, .21), legend.direction = "horizontal", legend.key.width = unit(.01, "lines"), legend.text = element_text(size = 8, family = "Neutraface Text Book Italic", color = "grey50")) +
  labs(
    x = "Bill length (mm)",
    y = "Bill depth (mm)",
    title = "Bill dimensions of brush-tailed penguins",
    subtitle = "A. Scatterplot of bill length versus bill depth (error bars show median +/- sd)"
  )
## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour
df_rect <-
  tibble(
    xmin = c(-Inf, 2.46, 3.27),
    xmax = c(Inf, Inf, Inf),
    ymin = c(3, 2, 1),
    ymax = c(Inf, Inf, Inf)
  )

df_peng_iqr <- 
  df_penguins %>% 
  mutate(bill_ratio = bill_length_mm / bill_depth_mm) %>% 
  filter(!is.na(bill_ratio)) %>% 
  group_by(species) %>% 
  mutate(
    median = median(bill_ratio),
    q25 = quantile(bill_ratio, probs = .25),
    q75 = quantile(bill_ratio, probs = .75),
    n = n()
  ) %>% 
  ungroup() %>% 
  mutate(species_num = as.numeric(fct_rev(species))) 

url <- "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/man/figures/lter_penguins.png"
img <- png::readPNG(RCurl::getURLContent(url))
i2 <- grid::rasterGrob(img, interpolate = T)
rain <- 
  ggplot(df_peng_iqr, aes(bill_ratio, species_num - .2)) +
  geom_rect(
    data = df_rect,
    aes(
      xmin = xmin, xmax = xmax,
      ymin = ymin, ymax = ymax
    ),
    inherit.aes = F,
    fill = "white"
  ) +
  geom_linerange(
    data = df_peng_iqr %>% 
      group_by(species, species_num) %>% 
      summarize(m = unique(median)),
    aes(
      xmin = -Inf, 
      xmax = m, 
      y = species_num,
      color = species
    ),
    inherit.aes = F,
    linetype = "dotted",
    size = .7
  ) +
  geom_boxplot(
    aes(
      color = species,
      color = after_scale(darken(color, .1, space = "HLS"))
    ),
    width = 0,
    size = .9
  ) +
  geom_rect(
    aes(
      xmin = q25,
      xmax = median,
      ymin = species_num - .05,
      ymax = species_num - .35
    ),
    fill = "grey89"
  ) +
  geom_rect(
    aes(
      xmin = q75,
      xmax = median,
      ymin = species_num - .05,
      ymax = species_num - .35
    ),
    fill = "grey79"
  ) +
  geom_segment(
    aes(
      x = q25, 
      xend = q25,
      y = species_num - .05,
      yend = species_num - .35,
      color = species,
      color = after_scale(darken(color, .05, space = "HLS"))
    ),
    size = .25
  ) +
  geom_segment(
    aes(
      x = q75, 
      xend = q75,
      y = species_num - .05,
      yend = species_num - .35,
      color = species,
      color = after_scale(darken(color, .05, space = "HLS"))
    ),
    size = .25
  ) +
  geom_point(
    aes(
      color = species
    ), 
    shape = "|",
    size = 5,
    alpha = .33
  ) +
  ggdist::stat_halfeye(
    aes(
      y = species_num,
      color = species,
      fill = after_scale(lighten(color, .5))
    ),
    shape = 18,
    point_size = 3,
    interval_size = 1.8,
    adjust = .5,
    .width = c(0, 1)
  ) +
  geom_text(
    data = df_peng_iqr %>% 
      group_by(species, species_num) %>% 
      summarize(m = unique(median)),
    aes(
      x = m, 
      y = species_num + .12,
      label = format(round(m, 2), nsmall = 2)
    ),
    inherit.aes = F,
    color = "white",
    family = "Neutraface Slab Display TT Titl",
    size = 3.5
  ) +
  geom_text(
    data = df_peng_iqr %>% 
      group_by(species, species_num) %>% 
      summarize(n = unique(n), max = max(bill_ratio, na.rm = T)),
    aes(
      x = max + .01, 
      y = species_num + .02,
      label = glue::glue("n = {n}"),
      color = species
    ),
    inherit.aes = F,
    family = "Neutraface Slab Display TT Bold",
    size = 3.5,
    hjust = 0
  ) +
  annotation_custom(i2, ymin = 2.5, ymax = 3.6, xmin = 3, xmax = 3.7) +
  coord_cartesian(clip = "off") +
  scale_x_continuous(
    limits = c(1.57, 3.7),
    breaks = seq(1.6, 3.6, by = .2),
    expand = c(.001, .001)
  ) +
  scale_y_continuous(
    limits = c(.55, NA),
    breaks = 1:3,
    labels = c("Gentoo", "Chinstrap", "Adélie"),
    expand = c(0, 0)
  ) +
  scale_color_manual(
    values = pal,
    guide = F
  ) +
  scale_fill_manual(
    values = pal,
    guide = F
  ) +
  labs(
    x = "Bill ratio",
    y = NULL,
    subtitle = "B. Distribution of the bill ratio, estimated as bill length divided by bill depth",
    caption = 'Note: In the original data, bill dimensions are recorded as "culmen length" and "culmen depth". The culmen is the dorsal (upper) ridge of a bird’s bill.\nVisualization: Cédric Scherer  •  Data: Gorman, Williams & Fraser (2014) DOI: 10.1371/journal.pone.0090081  •  Illustrations: Allison Horst'
  ) +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(family = "Neutraface Slab Display TT Titl", 
                               color = rev(pal), size = 14, lineheight = .9),
    axis.ticks.length = unit(0, "lines"),
    plot.subtitle = element_text(margin = margin(0, 0, -10, 0))
  )
## `summarise()` has grouped output by 'species'. You can override using the `.groups` argument.
## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour

## Warning: Duplicated aesthetics after name standardisation: colour
## `summarise()` has grouped output by 'species'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'species'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
path <- here::here("plots", "2020_31", "2020_31_PalmerPenguins")

plot_grid(scat, rain, ncol = 1, rel_heights = c(1, .75)) 
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Bold' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'Neutraface Text Book Italic' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface 2 Display Titling' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Text Book Italic' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Slab Display TT Titl' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Neutraface Display Medium' not found in PostScript font database

## packages
library(tidyverse)
library(ggforce)
library(ggtext)
library(extrafont)
library(showtext)

loadfonts()
font_add_google("Staatliches", "Staatliches")

## ggplot theme
theme_set(theme_minimal(base_family = "Britannic Bold"))

theme_update(axis.text = element_text(size = 14, color = "grey45"),
             legend.title = element_text(size = 16, color = "grey65", face = "bold"),
             legend.text = element_text(family = "Staatliches", size = 11, color = "grey45", face = "plain"),
             panel.grid = element_blank(),
             plot.background = element_rect(fill = "grey12", color = "grey12"),
             plot.margin = margin(30, 80, 10, 50),
             plot.title = element_markdown(size = 29, color = "grey97", 
                                           face = "plain", lineheight = 1.15),
             plot.title.position = "plot",
             plot.subtitle = element_text(color = "grey45", size = 15, face = "plain",
                                          margin = margin(t = 25, b = 15)),
             plot.caption = element_text(color = "grey35", size = 12, face = "bold",
                                          margin = margin(40, -300, 15, 0)),
             plot.caption.position = "plot")
df_grosses <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-28/grosses.csv', guess_max = 40000)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   week_ending = col_date(format = ""),
##   week_number = col_double(),
##   weekly_gross_overall = col_double(),
##   show = col_character(),
##   theatre = col_character(),
##   weekly_gross = col_double(),
##   potential_gross = col_double(),
##   avg_ticket_price = col_double(),
##   top_ticket_price = col_double(),
##   seats_sold = col_double(),
##   seats_in_theatre = col_double(),
##   pct_capacity = col_double(),
##   performances = col_double(),
##   previews = col_double()
## )
pre_1985_starts <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-28/pre-1985-starts.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   show = col_character(),
##   start_date = col_date(format = "")
## )
df_gross_show <- 
  df_grosses %>% 
  mutate(year = lubridate::year(week_ending)) %>% 
  filter(avg_ticket_price > 0) %>% 
  group_by(show) %>% 
  summarize(
    year = min(year),
    weekly_gross_overall = sum(weekly_gross_overall, na.rm = T),
    avg_ticket_price = mean(avg_ticket_price, na.rm = T),
    top_ticket_price = mean(top_ticket_price, na.rm = T),
    seats_sold = sum(seats_sold, na.rm = T)
  ) %>% 
  mutate(gross_per_seat = weekly_gross_overall / seats_sold) %>% 
  ungroup() %>% 
  left_join(pre_1985_starts) %>%
  filter(show != "Les Miserables") %>% 
  mutate(
    show = case_when(
      show == "Hamilton" ~ "A high ranked musical\nI never heard of.",
      show == "Springsteen On Broadway" ~ "I thought this guy plays concerts?",
      show == "The Lion King" ~ "Of course I've\nseen the movie!\nBut the musical?",
      show == "Chicago" ~ "Another high-ranked\none I had no idea of.",
      show == "Wicked" ~ "At least I know\nthe classic novel.",
      show == "The Phantom of the Opera" ~ "I love the music!",
      show == "Cats" ~ "Wouldnt even go\nthere for free...",
      show == "Marc Salem's Mind Games on Broadway" ~ "I even wrapped already\nall the long brodway\nshow names using the\ncool str_wrap() function\nbefore realizing it doesn't\nmake any sense at all.",
      show == "A Mom's Life" ~ "A very interesting outlier!\n(I thought.)",
      TRUE ~ " "
    ),
    year = if_else(!is.na(start_date), lubridate::year(start_date), year)
  )
## Joining, by = "show"
df_gross_show %>% 
  ggplot(aes(gross_per_seat, avg_ticket_price)) +
    geom_point(aes(size = seats_sold),
               shape = 21,
               fill = "grey10",
               color = "transparent") +
    geom_point(data = df_gross_show %>% 
                 filter(avg_ticket_price >= 220 | gross_per_seat >= 30000 | seats_sold >= 8.5*10^6),
               aes(fill = year, 
                   size = seats_sold),
               shape = 21,
               alpha = .2,
               color = "transparent") +
    geom_point(aes(fill = year, 
                   size = seats_sold),
               shape = 21,
               color = "transparent",
               alpha = .2) +
    geom_point(aes(color = year, 
                   color = after_scale(colorspace::darken(color, .05)),
                   size = seats_sold),
               shape = 21,
               fill = "transparent",
               stroke = .5) +
    geom_point(data = df_gross_show %>% 
                 filter(avg_ticket_price >= 220 | gross_per_seat >= 30000 | seats_sold >= 8.5*10^6),
               aes(color = year, 
                   color = after_scale(colorspace::lighten(color, .05)),
                   size = seats_sold),
               shape = 21,
               fill = "transparent",
               stroke = 1.1) +
    geom_mark_ellipse(data = df_gross_show %>% 
                        filter(avg_ticket_price >= 220 | gross_per_seat >= 30000 | seats_sold >= 8.5*10^6),
                      aes(group = show, 
                          label = show),
                      color = "transparent",
                      label.family = "Staatliches",
                      label.fontsize = 16,
                      label.fill = "#ffffff80",
                      label.buffer = unit(5, "mm"),
                      con.colour = "grey90",
                      con.size = .6,
                      con.cap = unit(0, "mm"),
                      expand = unit(5, "mm"),
                      con.type = "elbow") +
    geom_segment(data = tibble(x = c(350, 300), xend = c(80000, 300), 
                               y = c(6, 7), yend = c(6, 600)),
                 aes(x, y, xend = xend, yend = yend),
                 color = "grey45",
                 size = .6,
                 arrow = arrow(length = unit(.1, "inches"))) +
    geom_text(aes(85000, 6, 
                  label = "A non-sense ratio"),
              family = "Britannic Bold",
              color = "grey45",
              fontface = "plain",
              size = 5.5,
              hjust = 0) +
    geom_text(aes(300, 650, 
                  label = "Ticket price\n(confusing log-scale)"),
              family = "Britannic Bold",
              color = "grey45",
              fontface = "plain",
              lineheight = .9,
              size = 5.5,
              vjust = 0) +
    coord_cartesian(clip = "off") +
    scale_x_log10(expand = c(.001, .001),
                  limits = c(NA, 160000),
                  breaks = c(500, 5000, 50000),
                  labels = c("Non-sense\nvalue", "Non-sense\nvalue", "Non-sense\nvalue")) +
    scale_y_log10(expand = c(.001, .001),
                  limits = c(NA, 1000),
                  breaks = c(10, 30, 100, 300),
                  labels = c("$", "$$", "$$$$", "   $$$$$$$$")) +
    rcartocolor::scale_color_carto_c(palette = "Sunset", 
                                     guide = F) +
    rcartocolor::scale_fill_carto_c(palette = "Sunset", 
                                  name = "Year of first broadway show",
                                  breaks = c(1975, 1980, 1990, 2000, 2010, 2020)) +
    scale_radius(range = c(2, 12),
                 name = "Seats sold",
                 breaks = c(10000, 100000, 1000000, 5000000, 10000000),
                 labels = c("10,000  ", "100,000  ", "1 million  ", "5 million  ", "10 millions")) +
    guides(fill = guide_colorbar(direction = "horizontal",
                                 title.position = "top",
                                 title.hjust = .5,
                                 barwidth = unit(24.5, "lines"),
                                 barheight = unit(.4, "lines")),
           size = guide_legend(direction = "horizontal",
                               title.position = "top",
                               title.hjust = .5,
                               override.aes = list(fill = "#FAA476", 
                                                   color = "#FAA476"))) +
    labs(x = NULL, y = NULL,
         title = "<b style='font-size:36pt;'>This could be an insightful plot... if only I knew something about broadways.</b><br><span style='color:#b0b0b0'>a.k.a. a friendly reminder to myself that I should </span><span style='color:#f3e79b;'>read</span> <span style='color:#fab27f;'>the</span> <span style='color:#eb7f86;'>f**cking</span> <span style='color:#b95e9a;'>manual",
         subtitle = "This week's #TidyTuesday challenge is on Broadway Musicals—a topic I know nothing about—and I decided to ignore the glossary and jump right in.\nTurns out I've spent 3 hours to explore the data and polish the plot just to find out it makes no freaking sense at all.",
         caption = "Visualization by Cédric Scherer  •  Data by Playbill") +
    theme(legend.position = c(.83, .83),
          legend.box = "vertical")

## packages
library(tidyverse)
library(rnaturalearth)
library(systemfonts)
library(ggtext)
library(ggsci)
library(pdftools)

theme_set(theme_void(base_family = "Roboto Condensed", base_size = 9))

theme_update(
  axis.text.x = element_text(color = "grey60", margin = margin(t = 4)),
  axis.ticks.x = element_line(color = "grey60"),
  axis.ticks.length.x = unit(.4, "lines"),
  legend.position = "none",
  panel.grid = element_blank(),
  plot.margin = margin(35, 25, 15, 35),
  plot.background = element_rect(fill = "grey98", color = "grey98"),
  plot.caption = element_text(family = "Roboto", color = "grey60", 
                              size = 8, margin = margin(t = 30, r = 50))
)
df_mobile <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-11-10/mobile.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   entity = col_character(),
##   code = col_character(),
##   year = col_double(),
##   total_pop = col_double(),
##   gdp_per_cap = col_double(),
##   mobile_subs = col_double(),
##   continent = col_character()
## )
df_countries <- 
  ne_countries(scale = 110, returnclass = "sf") %>% 
  as_tibble()

df_mobile_sub <-
  df_mobile %>% 
  filter(year < max(year)) %>% 
  left_join(df_countries, by = c("code" = "iso_a3")) %>% 
  group_by(entity, year) %>% 
  slice(1) %>%
  ungroup %>% 
  add_count(entity) %>% 
  filter(n == max(n), !is.na(subregion)) %>% 
  dplyr::select(entity, year, mobile_subs, continent.x, continent.y, subregion, n) %>% 
  group_by(continent.x, subregion, year) %>% 
  summarize(mobile_subs = mean(mobile_subs, na.rm = TRUE)) %>% 
  arrange(year, continent.x, subregion) %>% 
  group_by(continent.x, year) %>% 
  mutate(
    id = row_number(), 
    alpha = id / max(id),
    subregion = str_replace(subregion, "\\band\\b", "&")
  ) %>% 
  ungroup
## `summarise()` has grouped output by 'continent.x', 'subregion'. You can override using the `.groups` argument.
df_mobile_end <-
  df_mobile_sub %>% 
  filter(year == max(year)) %>% 
  group_by(continent.x) %>% 
  mutate(end_cont = mean(mobile_subs[which(year == 2016)])) %>% 
  group_by(subregion) %>% 
  mutate(end_sub = mobile_subs[which(year == 2016)]) %>% 
  arrange(-end_cont, -end_sub) %>% 
  ungroup %>% 
  mutate(id_sort = row_number()) %>% 
  dplyr::select(subregion, id_sort, end_cont, end_sub)

df_mobile_fct <-
  df_mobile_sub %>% 
  left_join(df_mobile_end, by = c("subregion")) %>% 
  mutate(
    continent.x = fct_reorder(continent.x, -end_cont),
    subregion = fct_reorder(subregion, id_sort),
    continent_rev = fct_rev(continent.x)
  )
ggplot(
  df_mobile_fct,
  aes(year, mobile_subs, 
      group = subregion, 
      alpha = -alpha)
  ) +
  geom_area(aes(
      fill = continent.x#,
      #color = after_scale(fill),#colorspace::lighten(fill, .4))
    ),
    position = "stack",
    size = 0
  ) +
  geom_area(
    data = df_mobile_fct %>% group_by(continent.x, year) %>% 
      summarize(mobile_subs = sum(mobile_subs)),
    aes(
      year, mobile_subs,
      group = continent.x,
      color = continent.x,
      color = after_scale(colorspace::darken(color, .2, space = "HLS"))
    ),
    inherit.aes = FALSE,
    position = "stack",
    fill = "transparent",
    size = .9
  ) +
  annotate(
    geom = "rect", 
    xmin = 2016, xmax = Inf, 
    ymin = -Inf, ymax = Inf,
    fill = "grey98",
    size = 0
  ) +
  geom_richtext(
    data = df_mobile_fct %>% filter(year == 2014) %>% 
      group_by(continent_rev, year) %>% 
      summarize(mobile_subs = sum(mobile_subs)),
    aes(year, mobile_subs, 
        label = continent_rev,
        vjust = mobile_subs / 130),
    inherit.aes = FALSE,
    position = "stack",
    family = "Rockwell",
    size = 6.5,
    fontface = "bold",
    color = "white",
    label.colour = NA,
    fill = NA
  ) +
  annotate(
    "text", x = 2014, y = 1830,
    label = "Regions of:",
    color = "grey85",
    size = 3.5,
    family = "Rockwell"
  ) +
  geom_richtext(
    data = df_mobile_fct %>% filter(year == 2016),
    aes(year, mobile_subs, group = subregion,
        label = glue::glue("<b style='font-size:9pt;'>{subregion}</b><br>{round(mobile_subs, 1)} per 100 persons"),
        color = continent.x,
        nudge_x = 1,
        hjust = 0,
        vjust = .9),
    inherit.aes = FALSE,
    position = "stack",
    family = "Roboto Condensed",
    size = 2.3,
    label.colour = NA,
    fill = NA
  ) +
  geom_richtext(
    data = tibble(year = 1998.7, mobile_subs = 1650),
    aes(year, mobile_subs,
        label = glue::glue("<b style='font-size:20pt;color:black;font-family:rockwell;'>Nowadays, in many regions of the world<br>there are more mobile subscriptions than people</b><br><br><br><span style='font-size:9pt'>In the last decades, the number of mobile devices has grown rapidly across the world.<br>In 2016, in 13 out of 20 regions of the world the number of subscriptions exceeded more than one<br>mobile phone per person, but different countries and regions have had varying rates of adoption.<br><br></span>The stacked area chart shows the **average number of mobile subscriptions per region** over a timperiod of 27 years. Note that<br>consequently the width of all regions together **does not** represent the average number of subscriptions per continent.")),
    inherit.aes = FALSE,
    position = "stack",
    family = "Roboto",
    size = 2.8,
    color = "grey40",
    label.colour = NA,
    fill = NA,
    lineheight = 1.6
  ) +
  # geom_vline(
  #   data = tibble(x = 1991:2015),
  #   aes(xintercept = x), 
  #   inherit.aes = FALSE,
  #   color = "white",
  #   #linetype = "dotted",
  #   size = .2
  # ) +
  coord_cartesian(clip = "off") +
  scale_x_continuous(
    expand = c(0, 0),
    limits = c(1990, 2019.5),
    breaks = 1990:2016
  ) +
  scale_y_continuous(
    expand = c(.007, .007)
  ) +
  scale_color_uchicago() +
  scale_fill_uchicago() +
  scale_alpha(range = c(.5, 1)) +
  labs(caption = "Visualization by Cédric Scherer  •  Data by OurWorldInData.org")
## `summarise()` has grouped output by 'continent.x'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'continent_rev'. You can override using the `.groups` argument.

# https://www.data-imaginist.com/2019/the-ggforce-awakens-again/
# https://www.data-imaginist.com/2019/entering-and-exiting-2018/
# https://www.data-imaginist.com/2018/what-are-we-plotting-what-are-we-animating/
# https://www.data-imaginist.com/2018/let-it-flow-let-it-flow-let-it-flow/
# Data is beautifugraph
gdp    <- readr::read_csv("https://raw.githubusercontent.com/amrrs/animated_bar_charts_in_R/master/data/GDP_Data.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Series Name` = col_character(),
##   `Series Code` = col_character(),
##   `Country Name` = col_character(),
##   `Country Code` = col_character(),
##   `1990 [YR1990]` = col_character(),
##   `2000 [YR2000]` = col_character(),
##   `2009 [YR2009]` = col_character(),
##   `2010 [YR2010]` = col_character(),
##   `2011 [YR2011]` = col_character(),
##   `2012 [YR2012]` = col_character(),
##   `2013 [YR2013]` = col_character(),
##   `2014 [YR2014]` = col_character(),
##   `2015 [YR2015]` = col_character(),
##   `2016 [YR2016]` = col_character(),
##   `2017 [YR2017]` = col_character(),
##   `2018 [YR2018]` = col_character()
## )
library(tidyverse)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
# gdp <- read_csv("./data/GDP_Data.csv")
#select required columns
gdp <- gdp %>% select(3:15) 
#filter only country rows
gdp <- gdp[1:217,]
gdp_tidy <- gdp %>% 
  mutate_at(vars(contains("YR")),as.numeric)%>% 
  gather(year,value,3:13) %>% 
  janitor::clean_names() %>% 
  mutate(year = as.numeric(stringr::str_sub(year,1,4)))
# write_csv(gdp_tidy,"./data/gdp_tidy.csv")

library(tidyverse)
library(gganimate)


gdp_formatted <- gdp_tidy %>%
  group_by(year) %>%
  # The * 1 makes it possible to have non-integer ranks while sliding
  mutate(rank = rank(-value),
         Value_rel = value/value[rank==1],
         Value_lbl = paste0(" ",round(value/1e9))) %>%
  group_by(country_name) %>% 
  filter(rank <=10) %>%
  ungroup()


staticplot = ggplot(gdp_formatted, aes(rank, group = country_name, 
                                       fill = as.factor(country_name), color = as.factor(country_name))) +
  geom_tile(aes(y = value/2,
                height = value,
                width = 0.9), alpha = 0.8, color = NA) +
  geom_text(aes(y = 0, label = paste(country_name, " ")), vjust = 0.2, hjust = 1) +
  geom_text(aes(y=value,label = Value_lbl, hjust=0)) +
  coord_flip(clip = "off", expand = FALSE) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_reverse() +
  guides(color = FALSE, fill = FALSE) +
  theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major.x = element_line( size=.1, color="grey" ),
        panel.grid.minor.x = element_line( size=.1, color="grey" ),
        plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
        plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey"),
        plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
        plot.background=element_blank(),
        plot.margin = margin(2,2, 2, 4, "cm"))


staticplot + transition_states(year, transition_length = 4, state_length = 1) +
  view_follow(fixed_x = TRUE)  +
  labs(title = 'GDP per Year : {closest_state}',  
       subtitle  =  "Top 10 Countries",
       caption  = "GDP in Billions USD | Data Source: World Bank Data")

#  Awesome addition below

  # shadow_mark(colour = "grey70") 
library(datasauRus)
library(ggplot2)
library(gganimate)
data('datasaurus_dozen')

ggplot(datasaurus_dozen, aes(x=x, y=y))+
  geom_point()+
  theme_minimal() +
  transition_states(dataset) + 
  ease_aes('cubic-in-out')

# devtools::install_github("clauswilke/ungeviz")
data(BlueJays, package = "Stat2Data")
bs <- ungeviz::bootstrapper(20, KnownSex)

ggplot(BlueJays, aes(BillLength, Head, color = KnownSex)) +
  geom_smooth(method = "lm", color = NA) +
  #run linear model on the orginal dataset,donot color regression line, 
  #showing only the confidence interval of the model.
  geom_point(alpha = 0.3) +
  # `.row` is a generated column providing a unique row number
  # to all rows in the bootstrapped data frame 
  geom_point(data = bs, aes(group = .row)) +
  geom_smooth(data = bs, method = "lm", fullrange = TRUE, se = FALSE) + #se is confidence interval
  facet_wrap(~KnownSex, scales = "free_x") +
  scale_color_manual(values = c(F = "#D55E00", M = "#0072B2"), guide = "none") +
  theme_bw() +
  transition_states(.draw, 1, 1) + 
  enter_fade() + 
  exit_fade()
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# good for visualization but boot strap thing doesnt make that statistic sense, just a gimmick , but we can make it useful, if we create a custom dataset

library(ggplot2)
library(dplyr)
library(forcats)
library(ungeviz)
library(gganimate)

cacao %>% filter(location %in% c("Canada", "U.S.A.")) %>%
  ggplot(aes(rating, location)) +
  geom_point(
    position = position_jitter(height = 0.3, width = 0.05), 
    size = 0.4, color = "#0072B2", alpha = 1/2
  ) +
  geom_vpline(data = sampler(25, group = location), height = 0.6, color = "#D55E00") +
  theme_bw() + 
  # `.draw` is a generated column indicating the sample draw
  transition_states(.draw, 1, 3)

 

Predictive Maintenance Prefab

Mu Sigma LABS